**PROGRAM TO REDUCE THE SCOPE OF DATA. WE MADE THIS A SEPARATE PROGRAM FOR BETTER ORGANIZATION. THIS PROGRAM IS USED IN OUTCOME CREATION AND ANALYSIS 


***********************
**Bringing in case status data and restrict complaints data: 
***********************


clear
insheet using "${rep_root}/data/FOIA/CLEAR CR Status Data.csv"
tempfile cr_temp
save `cr_temp'

clear
insheet using "${rep_root}/data/FOIA/CLEAR INFO Status Data.csv"
append using `cr_temp'

tab status

  

//order chronologically
gen ampm = substr(status_time, -2,2)
tab ampm
gen hr = substr(status_time, 1, strpos(status_time,":")-1)
tab hr
gen min = substr(status_time, strpos(status_time,":")+1, 2)
destring hr min, replace
replace hr = hr + 12 if hr<12 & ampm=="PM" 
replace hr = 0 if hr==12 & ampm=="AM"



gen status_time_str = status_date+ " " + status_time
gen status_dt = date(status_date, "MDY", 2025)
gen status_yr = year(status_dt) 
gen status_month = month(status_dt) 
gen dow = dow(status_dt)
sort cr_number status_dt hr min    

**Get intake time for each case
preserve
sort cr_number status_dt hr min
by cr_number: keep if _n==1

keep cr_number status_dt hr min dow
rename status_dt intake_dt
rename hr hr_intake
rename min min_intake
rename dow dow_intake

tempfile intake
save `intake'
restore
 


sort cr_number
merge m:1 cr_number using `intake', nogen


***********************
**GET PERSONNEL IN THE ROLE OF SUPERVISOR
***********************



**Create wide list of staff marked as pending assign team
preserve
keep if status=="PENDING ASSIGN TEAM"

sort cr_number
by cr_number: gen idx = _n
rename created_by intake_sup_

sum idx
local idx_max = r(max)

keep cr_number idx intake_sup_
reshape wide intake_sup_, i(cr_number) j(idx)

tempfile intake_wide
save `intake_wide'
restore



***********************
**REMOVE PERSONNEL WHO ARE NOT REALLY SUPERVISORS USING A VERIFIED LIST
***********************
 

//bring in list of investigating supervisors, and filter out those that are not. 
rename created_by status_sup  
replace status_sup = "SHANNON B HAYES" if status_sup == "SHANNON HAYES" //correct data error in name
 

preserve
clear
insheet using ${rep_root}/data/supervisors_identified.csv
tempfile sup_identified
save `sup_identified'
restore

merge m:1 status_sup using `sup_identified'
replace sup_identified = 0 if sup_identified !=1 
drop _merge


**identify whether the first supervisor is an identified supervisor
preserve
keep if status == "PENDING INVESTIGATION"
sort cr_number status_dt hr min  
by cr_number: keep if _n == 1
keep cr_number sup_identified
rename sup_identified first_sup 
tempfile first_sup
save `first_sup'
restore
merge m:1 cr_number using `first_sup', nogen
replace first_sup = 0 if first_sup!=1


//Among cases where the first supervisor is NOT identified, what's the number of supervisors? 
preserve
sort cr_number status_dt hr min
keep if first_sup == 0  
keep if status == "PENDING INVESTIGATION"
drop if agency == "BIA"
by cr_number:gen count = _N
by cr_number: keep if _n == 1
tab count
restore

//Among cases where the first supervisor is identified, what's the number of supervisors? 
preserve
sort cr_number status_dt hr min
keep if first_sup == 1
keep if status == "PENDING INVESTIGATION"
drop if agency == "BIA"
by cr_number:gen count = _N
by cr_number: keep if _n == 1
tab count
restore



//take the person who is in the role of "waiting for their investigator to investigate"  

keep if status == "PENDING INVESTIGATION"


 
//take the first supervising investigator
sort cr_number status_dt hr min  
by cr_number: keep if _n == 1


//drop if not an identified supervisor
drop if sup_identified !=1



keep cr_number status_sup intake_dt //sup_identified

tab status_sup

sort cr_number
merge 1:1 cr_number using `intake_wide', keep(1 3) nogen

forvalues i = 1/`idx_max'{
	drop if status_sup==intake_sup_`i'
}


rename cr_number cr_id


sort status_sup 
egen inv_status_id = group(status_sup) 

preserve

duplicates drop inv_status_id status_sup, force
list inv_status_id status_sup
save ${rep_root}/data/sup_id, replace
restore

tempfile status
save `status'


***********************
**MERGE SUPERVISORS WITH COMPLAINTS DATA
***********************

 

use ${rep_root}/data/FOIA/FOIA_unified/complaints, clear
tab finding_cd
merge m:1 cr_id using `status'

keep if _merge == 3
tab finding_cd
drop _merge



***********************
**RESTRICT TO SAMPLES WITH RANDOMIZATION
***********************
 


**Restrict to cases investigated by an IPRA, OPS, or COPA supervisor
keep if inlist(investigating_agency, "IPRA", "OPS", "COPA")
 
tab finding_cd

gen close_yr = year(inc_closed_dt_1)
drop if close_yr==1999

**Restrict to years where cases were being randomly assigned
gen intake_yr = year(intake_dt) 
gen intake_mo = month(intake_dt)

 
 
keep if inrange(intake_yr, 2006, 2007) | inrange(intake_yr, 2015, 2018) | (intake_yr==2008 & intake_mo<=7) | (intake_yr==2014 & intake_mo>=9)   
 
  tab finding_cd
 
 

gen intake_yearmo = ym(intake_yr, intake_mo)
format %tm intake_yearmo 

 

 
 

drop inv_id 
rename inv_status_id inv_id

**Drop shootings, domestic violence, and non-civillian complaints
drop if inc_shooting_1==1 | (vic_inj_1>=3 & !missing(vic_inj_1))
drop if inc_any_domestic==1
drop if inc_civilian_1==0

**Drop if missing crucial data
drop if missing(vic_race_1) | missing(vic_male_1) | missing(acc_white) | missing(acc_male) | missing(acc_age) | missing(inc_district_1) | missing(finding_cd)
tab finding_cd

**Drop non-POs
drop if acc_rank != "POLICE OFFICER"

**Keep only cases where the accused officer made at least one arrest at some point and we have outcome data
sort mid
merge m:1 mid using ${rep_root}/data/mids_with_arrests, keep(3) nogen
